Wczytanie danych
colors_raw <- read.csv("data/colors.csv", header = T, sep = ",")
elements_raw <- read.csv("data/elements.csv", header = T, sep = ",")
inventories_raw <- read.csv("data/inventories.csv", header = T, sep = ",")
inventory_minifigs_raw <- read.csv("data/inventory_minifigs.csv", header = T, sep = ",")
inventory_parts_raw <- read.csv("data/inventory_parts.csv", header = T, sep = ",")
inventory_sets_raw <- read.csv("data/inventory_sets.csv", header = T, sep = ",")
minifigs_raw <- read.csv("data/minifigs.csv", header = T, sep = ",")
part_categories_raw <- read.csv("data/part_categories.csv", header = T, sep = ",")
part_relationships_raw <- read.csv("data/part_relationships.csv", header = T, sep = ",")
parts_raw <- read.csv("data/parts.csv", header = T, sep = ",")
sets_raw <- read.csv("data/sets.csv", header = T, sep = ",")
themes_raw <- read.csv("data/themes.csv", header = T, sep = ",")
Przetwarzanie
brakujących danych
Colors
Brak modyfikacji surowych danych
colors_clean <- colors_raw
Elements
Usunięcie nadmiarowej kolumny design_id, która nie miała
wykorzystania w schemacie
elements_clean <- elements_raw[, -4]
Inventories
Brak modyfikacji surowych danych
inventories_clean <- inventories_raw
Inventory
Minifigs
Brak modyfikacji surowych danych
inventory_minifigs_clean <- inventory_minifigs_raw
Inventory
Parts
Usunięcie nadmiarowej kolumny img_url, która nie podlega analizie
inventory_parts_clean <- inventory_parts_raw[, -6]
Inventory Sets
Brak modyfikacji surowych danych
inventory_sets_clean <- inventory_sets_raw
Minifigs
Usunięcie nadmiarowej kolumny img_url, która nie podlega analizie
minifigs_clean <- minifigs_raw[, -4]
Part
Categories
Brak modyfikacji surowych danych
part_categories_clean <- part_categories_raw
Part
Relationships
Brak modyfikacji surowych danych
part_relationships_clean <- part_relationships_raw
Parts
Brak modyfikacji surowych danych
parts_clean <- parts_raw
Sets
Usunięcie nadmiarowej kolumny img_url, która nie podlega analizie
sets_clean <- sets_raw[, -6]
Themes
Brak modyfikacji surowych danych
themes_clean <- themes_raw
Podstawowe
statystyki
Colors
knitr::kable((head(colors_clean)))
| -1 |
[Unknown] |
0033B2 |
f |
| 0 |
Black |
05131D |
f |
| 1 |
Blue |
0055BF |
f |
| 2 |
Green |
237841 |
f |
| 3 |
Dark Turquoise |
008F9B |
f |
| 4 |
Red |
C91A09 |
f |
skim(colors_clean)
Data summary
| Name |
colors_clean |
| Number of rows |
263 |
| Number of columns |
4 |
| _______________________ |
|
| Column type frequency: |
|
| character |
3 |
| numeric |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| name |
0 |
1 |
3 |
28 |
0 |
263 |
0 |
| rgb |
0 |
1 |
6 |
6 |
0 |
223 |
0 |
| is_trans |
0 |
1 |
1 |
1 |
0 |
2 |
0 |
Variable type: numeric
| id |
0 |
1 |
651.38 |
750.55 |
-1 |
83 |
1005 |
1070.5 |
9999 |
▇▁▁▁▁ |
Elements
knitr::kable((head(elements_clean)))
| 6443403 |
2277c01pr0009 |
1 |
| 6300211 |
67906c01 |
14 |
| 4566309 |
2564 |
0 |
| 4275423 |
53657 |
1004 |
| 6194308 |
92926 |
71 |
| 6229123 |
26561 |
4 |
skim(elements_clean)
Data summary
| Name |
elements_clean |
| Number of rows |
84138 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| part_num |
0 |
1 |
2 |
19 |
0 |
33765 |
0 |
Variable type: numeric
| element_id |
0 |
1 |
5222065.12 |
1596842.63 |
9327 |
4259774 |
6057754 |
6262025 |
61532443 |
▇▁▁▁▁ |
| color_id |
0 |
1 |
539.67 |
2044.86 |
-1 |
8 |
28 |
135 |
9999 |
▇▁▁▁▁ |
Inventories
knitr::kable((head(inventories_clean)))
| 1 |
1 |
7922-1 |
| 3 |
1 |
3931-1 |
| 4 |
1 |
6942-1 |
| 15 |
1 |
5158-1 |
| 16 |
1 |
903-1 |
| 17 |
1 |
850950-1 |
skim(inventories_clean)
Data summary
| Name |
inventories_clean |
| Number of rows |
37265 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| set_num |
0 |
1 |
3 |
20 |
0 |
35644 |
0 |
Variable type: numeric
| id |
0 |
1 |
61103.60 |
51380.10 |
1 |
14424 |
54379 |
88842 |
194312 |
▇▆▂▂▂ |
| version |
0 |
1 |
1.09 |
0.58 |
1 |
1 |
1 |
1 |
16 |
▇▁▁▁▁ |
Inventory
Minifigs
knitr::kable((head(inventory_minifigs_clean)))
| 3 |
fig-001549 |
1 |
| 4 |
fig-000764 |
1 |
| 19 |
fig-000555 |
1 |
| 25 |
fig-000574 |
1 |
| 26 |
fig-000842 |
1 |
| 26 |
fig-008641 |
1 |
skim(inventory_minifigs_clean)
Data summary
| Name |
inventory_minifigs_clean |
| Number of rows |
20858 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| fig_num |
0 |
1 |
10 |
10 |
0 |
13455 |
0 |
Variable type: numeric
| inventory_id |
0 |
1 |
43010.44 |
52256.78 |
3 |
7869 |
15681 |
66834 |
194312 |
▇▁▁▁▁ |
| quantity |
0 |
1 |
1.06 |
0.78 |
1 |
1 |
1 |
1 |
100 |
▇▁▁▁▁ |
Inventory
Parts
knitr::kable((head(inventory_parts_clean)))
| 1 |
48379c01 |
72 |
1 |
f |
| 1 |
48395 |
7 |
1 |
f |
| 1 |
stickerupn0077 |
9999 |
1 |
f |
| 1 |
upn0342 |
0 |
1 |
f |
| 1 |
upn0350 |
25 |
1 |
f |
| 3 |
2343 |
47 |
1 |
f |
skim(inventory_parts_clean)
Data summary
| Name |
inventory_parts_clean |
| Number of rows |
1180987 |
| Number of columns |
5 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| part_num |
0 |
1 |
1 |
20 |
0 |
51051 |
0 |
| is_spare |
0 |
1 |
1 |
1 |
0 |
2 |
0 |
Variable type: numeric
| inventory_id |
0 |
1 |
50849.46 |
55136.94 |
1 |
9404 |
22838 |
87088 |
194312 |
▇▂▁▂▁ |
| color_id |
0 |
1 |
131.78 |
862.38 |
-1 |
4 |
15 |
71 |
9999 |
▇▁▁▁▁ |
| quantity |
0 |
1 |
3.37 |
9.95 |
1 |
1 |
2 |
4 |
3064 |
▇▁▁▁▁ |
Inventory Sets
knitr::kable((head(inventory_sets_clean)))
| 35 |
75911-1 |
1 |
| 35 |
75912-1 |
1 |
| 39 |
75048-1 |
1 |
| 39 |
75053-1 |
1 |
| 50 |
4515-1 |
1 |
| 50 |
4520-1 |
2 |
skim(inventory_sets_clean)
Data summary
| Name |
inventory_sets_clean |
| Number of rows |
4358 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| set_num |
0 |
1 |
5 |
20 |
0 |
3171 |
0 |
Variable type: numeric
| inventory_id |
0 |
1 |
52518.95 |
59063.13 |
35 |
8076 |
16423 |
98685 |
191576 |
▇▁▁▂▁ |
| quantity |
0 |
1 |
1.81 |
5.67 |
1 |
1 |
1 |
1 |
60 |
▇▁▁▁▁ |
Minifigs
knitr::kable((head(minifigs_clean)))
| fig-000001 |
Toy Store Employee |
4 |
| fig-000002 |
Customer Kid |
4 |
| fig-000003 |
Assassin Droid, White |
8 |
| fig-000004 |
Man, White Torso, Black Legs, Brown Hair |
4 |
| fig-000005 |
Captain America with Short Legs |
3 |
| fig-000006 |
Lloyd Avatar |
5 |
skim(minifigs_clean)
Data summary
| Name |
minifigs_clean |
| Number of rows |
13764 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| fig_num |
0 |
1 |
10 |
10 |
0 |
13764 |
0 |
| name |
0 |
1 |
1 |
148 |
0 |
13354 |
0 |
Variable type: numeric
| num_parts |
0 |
1 |
5.3 |
6.03 |
0 |
4 |
4 |
5 |
156 |
▇▁▁▁▁ |
Part
Categories
knitr::kable((head(part_categories_clean)))
| 1 |
Baseplates |
| 3 |
Bricks Sloped |
| 4 |
Duplo, Quatro and Primo |
| 5 |
Bricks Special |
| 6 |
Bricks Wedged |
| 7 |
Containers |
skim(part_categories_clean)
Data summary
| Name |
part_categories_clean |
| Number of rows |
66 |
| Number of columns |
2 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
Variable type: numeric
| id |
0 |
1 |
35.36 |
19.41 |
1 |
19.25 |
35.5 |
51.75 |
68 |
▇▇▇▇▇ |
Part
Relationships
knitr::kable((head(part_relationships_clean)))
| P |
3626cpr3662 |
3626c |
| P |
87079pr9974 |
87079 |
| P |
3960pr9971 |
3960 |
| R |
98653pr0003 |
98086pr0003 |
| R |
98653pr0003 |
98088pat0003 |
| R |
98653pr0003 |
98089pat0003 |
skim(part_relationships_clean)
Data summary
| Name |
part_relationships_clean |
| Number of rows |
29977 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| rel_type |
0 |
1 |
1 |
1 |
0 |
6 |
0 |
| child_part_num |
0 |
1 |
1 |
20 |
0 |
27139 |
0 |
| parent_part_num |
0 |
1 |
1 |
19 |
0 |
4725 |
0 |
Parts
knitr::kable((head(parts_clean)))
| 003381 |
Sticker Sheet for Set 663-1 |
58 |
Plastic |
| 003383 |
Sticker Sheet for Sets 618-1, 628-2 |
58 |
Plastic |
| 003402 |
Sticker Sheet for Sets 310-3, 311-1, 312-3 |
58 |
Plastic |
| 003429 |
Sticker Sheet for Set 1550-1 |
58 |
Plastic |
| 003432 |
Sticker Sheet for Sets 357-1, 355-1, 940-1 |
58 |
Plastic |
| 003434 |
Sticker Sheet for Set 575-2, 653-1, 460-1 |
58 |
Plastic |
skim(parts_clean)
Data summary
| Name |
parts_clean |
| Number of rows |
52615 |
| Number of columns |
4 |
| _______________________ |
|
| Column type frequency: |
|
| character |
3 |
| numeric |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| part_num |
0 |
1 |
1 |
20 |
0 |
52615 |
0 |
| name |
0 |
1 |
3 |
222 |
0 |
52103 |
0 |
| part_material |
0 |
1 |
4 |
16 |
0 |
7 |
0 |
Variable type: numeric
| part_cat_id |
0 |
1 |
38.91 |
22.08 |
1 |
17 |
41 |
60 |
68 |
▃▃▂▁▇ |
Sets
knitr::kable((head(sets_clean)))
| 001-1 |
Gears |
1965 |
1 |
43 |
| 0011-2 |
Town Mini-Figures |
1979 |
67 |
12 |
| 0011-3 |
Castle 2 for 1 Bonus Offer |
1987 |
199 |
0 |
| 0012-1 |
Space Mini-Figures |
1979 |
143 |
12 |
| 0013-1 |
Space Mini-Figures |
1979 |
143 |
12 |
| 0014-1 |
Space Mini-Figures |
1979 |
143 |
2 |
skim(sets_clean)
Data summary
| Name |
sets_clean |
| Number of rows |
21880 |
| Number of columns |
5 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| set_num |
0 |
1 |
3 |
20 |
0 |
21880 |
0 |
| name |
0 |
1 |
2 |
93 |
0 |
18752 |
0 |
Variable type: numeric
| year |
0 |
1 |
2007.76 |
13.96 |
1949 |
2001 |
2012 |
2018 |
2024 |
▁▁▁▃▇ |
| theme_id |
0 |
1 |
441.97 |
215.53 |
1 |
273 |
497 |
608 |
752 |
▃▃▃▇▇ |
| num_parts |
0 |
1 |
161.38 |
418.14 |
0 |
3 |
31 |
139 |
11695 |
▇▁▁▁▁ |
Themes
knitr::kable((head(themes_clean)))
| 1 |
Technic |
NA |
| 3 |
Competition |
1 |
| 4 |
Expert Builder |
1 |
| 16 |
RoboRiders |
1 |
| 17 |
Speed Slammers |
1 |
| 18 |
Star Wars |
1 |
skim(themes_clean)
Data summary
| Name |
themes_clean |
| Number of rows |
468 |
| Number of columns |
3 |
| _______________________ |
|
| Column type frequency: |
|
| character |
1 |
| numeric |
2 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
Variable type: numeric
| id |
0 |
1.00 |
433.46 |
216.55 |
1 |
250.5 |
466 |
625.25 |
752 |
▅▅▅▆▇ |
| parent_id |
145 |
0.69 |
360.64 |
197.19 |
1 |
186.0 |
411 |
512.50 |
697 |
▅▃▂▇▂ |